https://scanpy-tutorials.readthedocs.io/en/latest/visualizing-marker-genes.html
import scanpy as sc
import pandas as pd
from matplotlib import rcParams
sc.set_figure_params(dpi=80, color_map='viridis')
sc.settings.verbosity = 2
sc.logging.print_versions()
pbmc = sc.datasets.pbmc68k_reduced()
rcParams['figure.figsize'] = 4, 4
sc.pl.umap(pbmc, color=['bulk_labels'], s=50)
marker_genes = ['CD79A', 'MS4A1', 'CD8A', 'CD8B', 'LYZ', 'LGALS3', 'S100A8', 'GNLY', 'NKG7', 'KLRB1',
'FCGR3A', 'FCER1A', 'CST3']
Plot marker genes per cluster using stacked violin plots.
ax = sc.pl.stacked_violin(pbmc, marker_genes, groupby='bulk_labels',
var_group_positions=[(7, 8)], var_group_labels=['NK'])
# Same as before but swapping the axes and with dendrogram (notice that the categories are reordered).
ax = sc.pl.stacked_violin(pbmc, marker_genes, groupby='bulk_labels', swap_axes=True,
var_group_positions=[(7, 8)], var_group_labels=['NK'], dendrogram=True)
## Dot plots
marker_genes_dict = {'B-cell': ['CD79A', 'MS4A1'],
'T-cell': 'CD3D',
'T-cell CD8+': ['CD8A', 'CD8B'],
'NK': ['GNLY', 'NKG7'],
'Myeloid': ['CST3', 'LYZ'],
'Monocytes': ['FCGR3A'],
'Dendritic': ['FCER1A']}
# use marker genes as dict to group them
ax = sc.pl.dotplot(pbmc, marker_genes_dict, groupby='bulk_labels')
ax = sc.pl.dotplot(pbmc,
marker_genes,
groupby='bulk_labels',
dendrogram=True,
dot_max=0.5,
dot_min=0.3,
standard_scale='var')
ax = sc.pl.dotplot(pbmc,
marker_genes_dict,
groupby='bulk_labels',
dendrogram=True,
standard_scale='var',
smallest_dot=40,
color_map='Blues',
figsize=(8,5))
ax = sc.pl.dotplot(pbmc, marker_genes, groupby='louvain',
var_group_positions=[(0,1), (11, 12)],
var_group_labels=['B cells', 'dendritic'],
figsize=(12,4), var_group_rotation=0, dendrogram='dendrogram_louvain')
The matrixplot shows the mean expression of a gene in a group by category as a heatmap. In contrast to dotplot, the matrix plot can be used with corrected and/or scaled counts. By default raw counts are used.
gs = sc.pl.matrixplot(pbmc, marker_genes_dict, groupby='bulk_labels')
gs = sc.pl.matrixplot(pbmc,
marker_genes_dict,
groupby='bulk_labels',
dendrogram=True,
standard_scale='var')
# to use the 'non-raw' data we select marker genes present in this data.
marker_genes_2 = [x for x in marker_genes if x in pbmc.var_names]
gs = sc.pl.matrixplot(pbmc, marker_genes_2, groupby='bulk_labels', dendrogram=True,
use_raw=False, vmin=-3, vmax=3, cmap='bwr', swap_axes=True, figsize=(5,6))
ax = sc.pl.heatmap(pbmc,marker_genes_dict, groupby='louvain')
ax = sc.pl.heatmap(pbmc, marker_genes, groupby='louvain', figsize=(5, 8),
var_group_positions=[(0,1), (11, 12)], use_raw=False, vmin=-3, vmax=3, cmap='bwr',
var_group_labels=['B cells', 'dendritic'], var_group_rotation=0, dendrogram='dendrogram_louvain')
# Track plot data is better visualized using the non-log counts
import numpy as np
ad = pbmc.copy()
ad.raw.X.data = np.exp(ad.raw.X.data)
ax = sc.pl.tracksplot(ad, marker_genes, groupby='louvain')
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
sc.tl.rank_genes_groups(pbmc, groupby='bulk_labels', method='logreg')
rcParams['figure.figsize'] = 4,4
rcParams['axes.grid'] = True
sc.pl.rank_genes_groups(pbmc)
sc.pl.rank_genes_groups_dotplot(pbmc, n_genes=4)
axs = sc.pl.rank_genes_groups_dotplot(pbmc, n_genes=15, groups=['Dendritic', 'CD19+ B'])
axs = sc.pl.rank_genes_groups_dotplot(pbmc, groupby='louvain', n_genes=4, dendrogram='dendrogram_louvain')
axs = sc.pl.rank_genes_groups_matrixplot(pbmc, n_genes=3, standard_scale='var', cmap='Blues')
axs = sc.pl.rank_genes_groups_matrixplot(pbmc, n_genes=3, use_raw=False, vmin=-3, vmax=3, cmap='bwr')
# instead of pbmc we use the 'ad' object (created earlier) in which the raw matrix is exp(pbmc.raw.matrix). This
# highlights better the differences between the markers.
sc.pl.rank_genes_groups_stacked_violin(ad, n_genes=3)
# setting row_palette='slateblue' makes all violin plots of the same color
sc.pl.rank_genes_groups_stacked_violin(ad, n_genes=3, row_palette='slateblue')
# width is used to set the violin plot width. Here, after setting figsize wider than default,
# the `width` arguments helps to keep the violin plots thin.
sc.pl.rank_genes_groups_stacked_violin(ad, n_genes=3, swap_axes=True, figsize=(6, 10), width=0.4)
sc.pl.rank_genes_groups_heatmap(pbmc, n_genes=3, standard_scale='var')
sc.pl.rank_genes_groups_heatmap(pbmc, n_genes=3, use_raw=False, swap_axes=True, vmin=-3, vmax=3, cmap='bwr')
sc.pl.rank_genes_groups_heatmap(pbmc, n_genes=10, use_raw=False, swap_axes=True, show_gene_labels=False,
vmin=-3, vmax=3, cmap='bwr')
sc.pl.rank_genes_groups_tracksplot(ad, n_genes=3)
# compute hiearchical clustering based on the
# given `var_names` from the raw matrix
sc.tl.dendrogram(pbmc, 'bulk_labels', n_pcs=30)